Data source: https://archive.ics.uci.edu/ml/datasets/Beijing+Multi-Site+Air-Quality+Data
The data set includes hourly air pollutants data from 12 nationally-controlled air-quality monitoring sites. The air-quality data are from the Beijing Municipal Environmental Monitoring Center. The meteorological data in each air-quality site are matched with the nearest weather station from the China Meteorological Administration. The time period is from March 1st, 2013 to February 28th, 2017. Missing data are denoted as NA.
Attribute Information:
No: row number
year: year of data in this row
month: month of data in this row
day: day of data in this row
hour: hour of data in this row
PM2.5: PM2.5 concentration (ug/m^3)
PM10: PM10 concentration (ug/m^3)
SO2: SO2 concentration (ug/m^3)
NO2: NO2 concentration (ug/m^3)
CO: CO concentration (ug/m^3)
O3: O3 concentration (ug/m^3)
TEMP: temperature (degree Celsius)
PRES: pressure (hPa)
DEWP: dew point temperature (degree Celsius)
RAIN: precipitation (mm)
wd: wind direction
WSPM: wind speed (m/s)
station: name of the air-quality monitoring site
library(corrplot)
## corrplot 0.92 loaded
library(RColorBrewer)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(seasonal)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
library(TSstudio)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
##
## first, last
## The following objects are masked from 'package:dplyr':
##
## first, last
library(tseries)
library(ggplot2)
data <- read.csv("PRSA_Data_Wanshouxigong_20130301-20170228.csv", header = TRUE)
We are going to study the air pollution condition in Beijing, Wanshou xigong.
attach(data)
sum(is.na(data))
## [1] 5146
column_nane_list <- colnames(data)
for (a in column_nane_list){
print(a)
data <-data %>% fill(a,.direction = 'updown')
}
## [1] "No"
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(a)
##
## # Now:
## data %>% select(all_of(a))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## [1] "year"
## [1] "month"
## [1] "day"
## [1] "hour"
## [1] "PM2.5"
## [1] "PM10"
## [1] "SO2"
## [1] "NO2"
## [1] "CO"
## [1] "O3"
## [1] "TEMP"
## [1] "PRES"
## [1] "DEWP"
## [1] "RAIN"
## [1] "wd"
## [1] "WSPM"
## [1] "station"
sum( apply(data,2, function(x) is.na(x)))
## [1] 0
We fill the missing value by the previous value.If the previous value is not available, we will fill by the next value.
There are two type of information in the dataset. One is the diffrent index of the air pollutants (e.g. PM2.5, PM10, SO2, NO2, CO, O3). And the other type is the weather condition (e.g. TEMP, PRES, DEWP, RAIN, wd, WSPM).
grouped_year = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES,DEWP,RAIN,WSPM) ~ data$year, data = data, FUN = mean, na.rm = TRUE)
grouped_month = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES, DEWP,RAIN,WSPM) ~ data$year+ data$month, data = data, FUN = mean, na.rm = TRUE)
grouped_day = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES, DEWP,RAIN,WSPM) ~ data$year+ data$month + data$day, data = data, FUN = mean, na.rm = TRUE)
grouped_year <- grouped_year[
order( grouped_year[,1] ),
]
grouped_month <- grouped_month[
order( grouped_month[,1], grouped_month[,2] ),
]
grouped_day <- grouped_day[
order( grouped_day[,1], grouped_day[,2], grouped_day[,3] ),
]
corr_month <- cor(grouped_month[,-c(1:2)])
corrplot(corr_month, type="upper", order="hclust",
col=brewer.pal(n=8, name="RdYlBu"))
We have found that the air pollutants have high correlation between each other (e.g. PM2.5, PM10, CO, SO2 and NO2 has positive correlation, but O3 has the negativity correlation with other air pollutants.)
And we also have found that the weather conditions also have high correlation with the air pollutants (e.g temperature, precipitation has negativity correlation with air pollutant(PM2.5,PM10, CO, SO2, NO2))
grouped_month <- grouped_month [,-c(1:2)]
ts_by_month <- ts(grouped_month , start = c(2013, 3), frequency = 12)
plot.ts(ts_by_month[,1:6])
plot.ts(ts_by_month[,7:11])
grouped_day <- grouped_day [,-c(1:3)]
dates_by_day <- seq(as.Date("2013-03-01"),length = nrow(grouped_day), by ="day")
ts_by_day <- xts(grouped_day,order.by = dates_by_day, frequency = "daily")
ts_heatmap(ts_by_day[,'PM2.5'],title = " Heatmap - the PM2.5 concentration in Wanshou xigong")
dates_by_month <- seq(as.Date("2013-03-01"),length = nrow(grouped_month), by ="month")
xts_by_month <- xts(grouped_month, order.by = dates_by_month , frequency = 12)
#
# plot.xts(xts_by_month[,1:5], multi.panel = TRUE)
#
# plot.xts(xts_by_month[,11])
# ts_by_day <- xts(grouped_day, order.by = dates, frequency = 12)
# cycle(ts_by_month)
boxplot(ts_by_month [,'PM2.5'] ~ cycle(ts_by_month [,'PM2.5']))
boxplot(ts_by_month [,'PM10'] ~ cycle(ts_by_month [,'PM10']))
boxplot(ts_by_month [,'SO2'] ~ cycle(ts_by_month [,'SO2']))
boxplot(ts_by_month [,'NO2'] ~ cycle(ts_by_month [,'NO2']))
boxplot(ts_by_month [,'CO'] ~ cycle(ts_by_month [,'CO']))
boxplot(ts_by_month [,'O3'] ~ cycle(ts_by_month [,'O3']))
boxplot(ts_by_month [,'TEMP'] ~ cycle(ts_by_month [,'TEMP']))
boxplot(ts_by_month [,'DEWP'] ~ cycle(ts_by_month [,'DEWP']))
boxplot(ts_by_month [,'PRES'] ~ cycle(ts_by_month [,'PRES']))
boxplot(ts_by_month [,'RAIN'] ~ cycle(ts_by_month [,'RAIN']))
boxplot(ts_by_month [,'WSPM'] ~ cycle(ts_by_month [,'WSPM']))
We can observe the distribution of difference variable among month
# additive decomposition
add_decomp_PM25 <- decompose(ts_by_month[,'PM2.5'], type = 'additive')
add_decomp<- decompose(ts_by_month, type = 'additive')
add_decomp_PM25
## $x
## Jan Feb Mar Apr May Jun Jul
## 2013 106.69892 78.82778 81.71640 105.67153 68.14651
## 2014 113.49194 156.06399 98.66667 86.64444 59.18804 58.03889 85.52151
## 2015 103.28199 102.95833 87.32258 71.85556 55.34543 59.44583 61.57124
## 2016 75.26210 48.35201 95.18952 67.05417 55.25806 64.08889 74.10753
## 2017 132.08333 77.61905
## Aug Sep Oct Nov Dec
## 2013 59.41532 61.74306 98.97849 84.30972 92.30914
## 2014 64.41331 70.99903 114.25806 103.31708 67.94247
## 2015 45.68548 53.15000 77.78226 125.42639 165.81989
## 2016 50.55242 59.25139 83.71640 106.37361 156.83468
## 2017
##
## $seasonal
## Jan Feb Mar Apr May Jun
## 2013 12.596727 -5.698226 -24.380240 -21.655511
## 2014 16.017806 21.170881 12.596727 -5.698226 -24.380240 -21.655511
## 2015 16.017806 21.170881 12.596727 -5.698226 -24.380240 -21.655511
## 2016 16.017806 21.170881 12.596727 -5.698226 -24.380240 -21.655511
## 2017 16.017806 21.170881
## Jul Aug Sep Oct Nov Dec
## 2013 -9.601027 -28.952748 -21.657639 13.707981 21.583772 26.868223
## 2014 -9.601027 -28.952748 -21.657639 13.707981 21.583772 26.868223
## 2015 -9.601027 -28.952748 -21.657639 13.707981 21.583772 26.868223
## 2016 -9.601027 -28.952748 -21.657639 13.707981 21.583772 26.868223
## 2017
##
## $trend
## Jan Feb Mar Apr May Jun Jul Aug
## 2013 NA NA NA NA NA NA
## 2014 87.14031 88.07252 88.66643 89.68875 91.11737 90.89406 89.45337 86.81522
## 2015 81.22384 79.44558 77.92155 75.65801 75.05941 80.05886 82.96959 79.52683
## 2016 78.40901 79.13415 79.59116 80.09264 79.54603 78.37778 80.37095 83.95796
## 2017 NA NA
## Sep Oct Nov Dec
## 2013 91.94639 91.93741 91.32442 88.40104
## 2014 84.12981 83.04094 82.26463 82.16314
## 2015 77.57935 77.70708 77.50339 77.69321
## 2016 NA NA NA NA
## 2017
##
## $random
## Jan Feb Mar Apr May
## 2013 NA NA NA
## 2014 10.33382068 46.82059124 -2.59649089 2.65392485 -7.54908995
## 2015 6.04034552 2.34186903 -3.19569409 1.89576779 4.66626068
## 2016 -19.16472204 -51.95301612 3.00162914 -7.34024848 0.09227343
## 2017 NA NA
## Jun Jul Aug Sep Oct
## 2013 NA NA NA -8.54569396 -6.66689181
## 2014 -11.19966299 5.66916168 6.55083331 8.52685202 17.50914281
## 2015 1.04248834 -11.79732274 -4.88859599 -2.77171390 -13.63280684
## 2016 7.36661881 3.33760522 -4.45279316 NA NA
## 2017
## Nov Dec
## 2013 -28.59846862 -22.96012658
## 2014 -0.53131766 -41.08889239
## 2015 26.33923044 61.25846313
## 2016 NA NA
## 2017
##
## $figure
## [1] 12.596727 -5.698226 -24.380240 -21.655511 -9.601027 -28.952748
## [7] -21.657639 13.707981 21.583772 26.868223 16.017806 21.170881
##
## $type
## [1] "additive"
##
## attr(,"class")
## [1] "decomposed.ts"
plot(add_decomp_PM25)
# multiplicative decomposition
multi_decomp_PM25 <- decompose(ts_by_month[,'PM2.5'], type = 'multiplicative')
multi_decomp <- decompose(ts_by_month, type = 'multiplicative')
multi_decomp_PM25
## $x
## Jan Feb Mar Apr May Jun Jul
## 2013 106.69892 78.82778 81.71640 105.67153 68.14651
## 2014 113.49194 156.06399 98.66667 86.64444 59.18804 58.03889 85.52151
## 2015 103.28199 102.95833 87.32258 71.85556 55.34543 59.44583 61.57124
## 2016 75.26210 48.35201 95.18952 67.05417 55.25806 64.08889 74.10753
## 2017 132.08333 77.61905
## Aug Sep Oct Nov Dec
## 2013 59.41532 61.74306 98.97849 84.30972 92.30914
## 2014 64.41331 70.99903 114.25806 103.31708 67.94247
## 2015 45.68548 53.15000 77.78226 125.42639 165.81989
## 2016 50.55242 59.25139 83.71640 106.37361 156.83468
## 2017
##
## $seasonal
## Jan Feb Mar Apr May Jun Jul
## 2013 1.1536745 0.9261272 0.7002633 0.7396729 0.8814532
## 2014 1.1888050 1.2376261 1.1536745 0.9261272 0.7002633 0.7396729 0.8814532
## 2015 1.1888050 1.2376261 1.1536745 0.9261272 0.7002633 0.7396729 0.8814532
## 2016 1.1888050 1.2376261 1.1536745 0.9261272 0.7002633 0.7396729 0.8814532
## 2017 1.1888050 1.2376261
## Aug Sep Oct Nov Dec
## 2013 0.6454082 0.7402740 1.1617698 1.2774791 1.3474468
## 2014 0.6454082 0.7402740 1.1617698 1.2774791 1.3474468
## 2015 0.6454082 0.7402740 1.1617698 1.2774791 1.3474468
## 2016 0.6454082 0.7402740 1.1617698 1.2774791 1.3474468
## 2017
##
## $trend
## Jan Feb Mar Apr May Jun Jul Aug
## 2013 NA NA NA NA NA NA
## 2014 87.14031 88.07252 88.66643 89.68875 91.11737 90.89406 89.45337 86.81522
## 2015 81.22384 79.44558 77.92155 75.65801 75.05941 80.05886 82.96959 79.52683
## 2016 78.40901 79.13415 79.59116 80.09264 79.54603 78.37778 80.37095 83.95796
## 2017 NA NA
## Sep Oct Nov Dec
## 2013 91.94639 91.93741 91.32442 88.40104
## 2014 84.12981 83.04094 82.26463 82.16314
## 2015 77.57935 77.70708 77.50339 77.69321
## 2016 NA NA NA NA
## 2017
##
## $random
## Jan Feb Mar Apr May Jun Jul
## 2013 NA NA NA NA NA
## 2014 1.0955578 1.4317686 0.9645571 1.0431149 0.9276228 0.8632645 1.0846244
## 2015 1.0696223 1.0471341 0.9713723 1.0254979 1.0529682 1.0038581 0.8418984
## 2016 0.8074204 0.4936978 1.0366711 0.9039877 0.9920094 1.1054779 1.0460777
## 2017 NA NA
## Aug Sep Oct Nov Dec
## 2013 NA 0.9071121 0.9266773 0.7226649 0.7749536
## 2014 1.1495959 1.1400135 1.1843349 0.9831170 0.6136951
## 2015 0.8900821 0.9254749 0.8615884 1.2668186 1.5839518
## 2016 0.9329225 NA NA NA NA
## 2017
##
## $figure
## [1] 1.1536745 0.9261272 0.7002633 0.7396729 0.8814532 0.6454082 0.7402740
## [8] 1.1617698 1.2774791 1.3474468 1.1888050 1.2376261
##
## $type
## [1] "multiplicative"
##
## attr(,"class")
## [1] "decomposed.ts"
plot(multi_decomp_PM25)
#
# plot(add_decomp$x[,'PM2.5'])
# plot(add_decomp$seasonal)
# # plot(add_decomp$trend)
# # plot(add_decomp$random)
#
add_decomp_PM25$trend
## Jan Feb Mar Apr May Jun Jul Aug
## 2013 NA NA NA NA NA NA
## 2014 87.14031 88.07252 88.66643 89.68875 91.11737 90.89406 89.45337 86.81522
## 2015 81.22384 79.44558 77.92155 75.65801 75.05941 80.05886 82.96959 79.52683
## 2016 78.40901 79.13415 79.59116 80.09264 79.54603 78.37778 80.37095 83.95796
## 2017 NA NA
## Sep Oct Nov Dec
## 2013 91.94639 91.93741 91.32442 88.40104
## 2014 84.12981 83.04094 82.26463 82.16314
## 2015 77.57935 77.70708 77.50339 77.69321
## 2016 NA NA NA NA
## 2017
add_decomp$trend[,1]
## Jan Feb Mar Apr May Jun Jul Aug
## 2013 NA NA NA NA NA NA
## 2014 87.14031 88.07252 88.66643 89.68875 91.11737 90.89406 89.45337 86.81522
## 2015 81.22384 79.44558 77.92155 75.65801 75.05941 80.05886 82.96959 79.52683
## 2016 78.40901 79.13415 79.59116 80.09264 79.54603 78.37778 80.37095 83.95796
## 2017 NA NA
## Sep Oct Nov Dec
## 2013 91.94639 91.93741 91.32442 88.40104
## 2014 84.12981 83.04094 82.26463 82.16314
## 2015 77.57935 77.70708 77.50339 77.69321
## 2016 NA NA NA NA
## 2017
add_decomp_PM25$random
## Jan Feb Mar Apr May
## 2013 NA NA NA
## 2014 10.33382068 46.82059124 -2.59649089 2.65392485 -7.54908995
## 2015 6.04034552 2.34186903 -3.19569409 1.89576779 4.66626068
## 2016 -19.16472204 -51.95301612 3.00162914 -7.34024848 0.09227343
## 2017 NA NA
## Jun Jul Aug Sep Oct
## 2013 NA NA NA -8.54569396 -6.66689181
## 2014 -11.19966299 5.66916168 6.55083331 8.52685202 17.50914281
## 2015 1.04248834 -11.79732274 -4.88859599 -2.77171390 -13.63280684
## 2016 7.36661881 3.33760522 -4.45279316 NA NA
## 2017
## Nov Dec
## 2013 -28.59846862 -22.96012658
## 2014 -0.53131766 -41.08889239
## 2015 26.33923044 61.25846313
## 2016 NA NA
## 2017
add_decomp$random[,1]
## Jan Feb Mar Apr May Jun
## 2013 NA NA NA NA
## 2014 -28.960982 38.124721 6.001469 31.715562 14.891982 6.638761
## 2015 -33.254457 -6.354002 5.402266 30.957405 27.107332 18.880913
## 2016 -58.459524 -60.648887 11.599589 21.721389 22.533345 25.205043
## 2017 NA NA
## Jul Aug Sep Oct Nov Dec
## 2013 NA NA -5.349203 15.764394 -47.676030 -90.936947
## 2014 29.924432 13.773747 11.723343 39.940429 -19.608879 -109.065713
## 2015 12.457948 2.334318 0.424777 8.798479 7.261669 -6.718357
## 2016 27.592876 2.770121 NA NA NA NA
## 2017
# ts_by_month[,'PM2.5']
STL.PM2.5<- stl( ts_by_month[,'PM2.5'] , t.window = 13, s.window = 'periodic')
plot(STL.PM2.5)
STL.PM10<- stl( ts_by_month[,'PM10'] , t.window = 13, s.window = 'periodic')
plot(STL.PM10)
STL.NO2<- stl( ts_by_month[,'NO2'] , t.window = 13, s.window = 'periodic')
plot(STL.NO2)
STL.SO2<- stl( ts_by_month[,'NO2'] , t.window = 13, s.window = 'periodic')
plot(STL.SO2)
STL.CO<- stl( ts_by_month[,'CO'] , t.window = 13, s.window = 'periodic')
plot(STL.CO)
STL.O3<- stl( ts_by_month[,'O3'] , t.window = 13, s.window = 'periodic')
plot(STL.O3)
STL.TEMP<- stl( ts_by_month[,'TEMP'] , t.window = 13, s.window = 'periodic')
plot(STL.TEMP)
STL.PRES<- stl( ts_by_month[,'PRES'] , t.window = 13, s.window = 'periodic')
plot(STL.PRES)
STL.RAIN<- stl( ts_by_month[,'RAIN'] , t.window = 13, s.window = 'periodic')
plot(STL.RAIN)
auto.arima(remainder(STL.PM2.5))
## Series: remainder(STL.PM2.5)
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 357.8: log likelihood = -209.23
## AIC=420.46 AICc=420.55 BIC=422.33
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.PM2.5), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise (PM2.5) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.PM10))
## Series: remainder(STL.PM10)
## ARIMA(2,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ma1
## 0.5851 -0.6317 -0.8254
## s.e. 0.1170 0.1137 0.0924
##
## sigma^2 = 180: log likelihood = -192.29
## AIC=392.57 AICc=393.5 BIC=400.06
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.PM10), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(PM10) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.SO2))
## Series: remainder(STL.SO2)
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 32.73: log likelihood = -151.83
## AIC=305.65 AICc=305.74 BIC=307.52
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.SO2), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(SO2) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.NO2))
## Series: remainder(STL.NO2)
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 32.73: log likelihood = -151.83
## AIC=305.65 AICc=305.74 BIC=307.52
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.NO2), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(NO2) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.CO))
## Series: remainder(STL.CO)
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 53698: log likelihood = -329.5
## AIC=660.99 AICc=661.08 BIC=662.86
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.CO), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(CO) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.O3))
## Series: remainder(STL.O3)
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 51.6: log likelihood = -162.76
## AIC=327.51 AICc=327.6 BIC=329.38
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.O3), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(O3) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.TEMP))
## Series: remainder(STL.TEMP)
## ARIMA(1,0,1) with zero mean
##
## Coefficients:
## ar1 ma1
## 0.3815 -0.8530
## s.e. 0.2102 0.1376
##
## sigma^2 = 0.5325: log likelihood = -52.3
## AIC=110.6 AICc=111.14 BIC=116.21
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.TEMP), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(TEMP) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.PRES))
## Series: remainder(STL.PRES)
## ARIMA(0,0,0) with zero mean
##
## sigma^2 = 1.942: log likelihood = -84.04
## AIC=170.09 AICc=170.17 BIC=171.96
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.PRES), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(PRES) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
auto.arima(remainder(STL.RAIN))
## Series: remainder(STL.RAIN)
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## -0.5866 -0.2711
## s.e. 0.1379 0.1360
##
## sigma^2 = 0.001314: log likelihood = 91.96
## AIC=-177.91 AICc=-177.37 BIC=-172.3
x <- lapply(1:25, function(i){
p <- Box.test(remainder(STL.RAIN), lag = i, type = "Ljung-Box")
output <- data.frame(lag = i, p_value = p$p.value)
return(output) }) %>% bind_rows
plot(x = x$lag,
y = x$p_value, ylim = c(0,1),
main = "Series white_noise(RAIN) - Ljung-Box Test",
xlab = "Lag", ylab = "P-Value")
abline(h = 0.05, col="red", lwd=3, lty=2)
forecast(STL.PM2.5, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 77.18490 44.111134 110.2587 26.60294 127.7669
## Apr 2017 56.62231 9.848936 103.3957 -14.91139 128.1560
## May 2017 43.71513 -13.570317 101.0006 -43.89540 131.3257
## Jun 2017 53.04516 -13.102381 119.1927 -48.11877 154.2091
## Jul 2017 53.96631 -19.988884 127.9215 -59.13840 167.0710
STL.PM2.5 %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'PM2.5') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'PM2.5'],title = " Heatmap - the PM2.5 concentration in Wanshou xigong")
forecast(STL.PM10, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 120.79043 88.517348 153.0635 71.43301 170.1479
## Apr 2017 93.15092 47.509882 138.7919 23.34898 162.9529
## May 2017 84.11630 28.217673 140.0149 -1.37327 169.6059
## Jun 2017 67.14942 2.603248 131.6956 -31.56543 165.8643
## Jul 2017 62.57884 -9.585969 134.7437 -47.78771 172.9454
STL.PM10 %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'PM10') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'PM10'],title = " Heatmap - the PM10 concentration in Wanshou xigong")
forecast(STL.SO2, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 61.73933 51.45773 72.02093 46.014975 77.46368
## Apr 2017 48.03199 33.49161 62.57237 25.794394 70.26958
## May 2017 45.55294 27.74468 63.36120 18.317562 72.78832
## Jun 2017 43.79880 23.23559 64.36200 12.350091 75.24750
## Jul 2017 40.59675 17.60639 63.58712 5.436031 75.75748
STL.SO2 %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'SO2') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'SO2'],title = " Heatmap - the SO2 concentration in Wanshou xigong")
forecast(STL.NO2, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 61.73933 51.45773 72.02093 46.014975 77.46368
## Apr 2017 48.03199 33.49161 62.57237 25.794394 70.26958
## May 2017 45.55294 27.74468 63.36120 18.317562 72.78832
## Jun 2017 43.79880 23.23559 64.36200 12.350091 75.24750
## Jul 2017 40.59675 17.60639 63.58712 5.436031 75.75748
STL.NO2 %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'NO2') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'NO2'],title = " Heatmap - the NO2 concentration in Wanshou xigong")
forecast(STL.CO, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 973.1480 572.96264 1373.333 361.1173 1585.179
## Apr 2017 541.0066 -24.94091 1106.954 -324.5354 1406.549
## May 2017 493.1916 -199.94972 1186.333 -566.8766 1553.260
## Jun 2017 705.9207 -94.44995 1506.291 -518.1406 1929.982
## Jul 2017 653.5837 -241.25789 1548.425 -714.9584 2022.126
STL.CO %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'CO') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'CO'],title = " Heatmap - the CO concentration in Wanshou xigong")
forecast(STL.O3, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 57.51380 45.54433 69.48328 39.20807 75.81954
## Apr 2017 74.45243 57.52503 91.37983 48.56420 100.34065
## May 2017 96.37771 75.64596 117.10945 64.67124 128.08418
## Jun 2017 104.09568 80.15672 128.03464 67.48420 140.70716
## Jul 2017 100.94909 74.18452 127.71366 60.01621 141.88196
STL.O3 %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'O3') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'O3'],title = " Heatmap - the O3 concentration in Wanshou xigong")
forecast(STL.TEMP, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 10.39841 8.668394 12.12842 7.752579 13.04424
## Apr 2017 17.13377 14.687157 19.58038 13.391999 20.87554
## May 2017 23.26267 20.266196 26.25914 18.679959 27.84538
## Jun 2017 26.24717 22.787142 29.70720 20.955513 31.53883
## Jul 2017 28.83308 24.964650 32.70151 22.916826 34.74934
STL.TEMP %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'TEMP') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'TEMP'],title = " Heatmap - the TEMP concentration in Wanshou xigong")
forecast(STL.PRES, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 1016.220 1013.4819 1018.959 1012.0323 1020.408
## Apr 2017 1010.481 1006.6084 1014.354 1004.5584 1016.404
## May 2017 1004.456 999.7135 1009.199 997.2028 1011.710
## Jun 2017 1001.340 995.8635 1006.817 992.9643 1009.716
## Jul 2017 1000.111 993.9874 1006.234 990.7461 1009.475
STL.PRES %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'PRES') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'PRES'],title = " Heatmap - the PRES concentration in Wanshou xigong")
forecast(STL.RAIN, h=5, method = 'naive')
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2017 0.001549438 -0.09152873 0.09462761 -0.14080134 0.1439002
## Apr 2017 0.016914377 -0.11471803 0.14854679 -0.18440002 0.2182288
## May 2017 0.041943287 -0.11927283 0.20315941 -0.20461549 0.2885021
## Jun 2017 0.126730572 -0.05942577 0.31288691 -0.15797098 0.4114321
## Jul 2017 0.250805358 0.04267624 0.45893448 -0.06750066 0.5691114
STL.RAIN %>% forecast(method="naive", h=5) %>%
autoplot(ylab = 'RAIN') + ggtitle('Forecasting')
ts_heatmap(ts_by_month[,'RAIN'],title = " Heatmap - the RAIN concentration in Wanshou xigong")
convert_date_to_weekday <- function(day, month, year){
date <- as.POSIXlt(paste(as.character(year) , as.character(month), as.character(day), sep="-"), tz = "UTC")
weekday <- weekdays(date)
return (weekday)
}
convert_date_to_weekday(25,11,2022)
## [1] "Friday"
data_weekday <- data
data_weekday$weekday <- mapply(convert_date_to_weekday, data$day, data$month, data$year)
grouped_weekday_year = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES, DEWP,RAIN,WSPM) ~ year + weekday , data = data_weekday, FUN = mean, na.rm = TRUE)
grouped_weekday_year$weekday <- factor(grouped_weekday_year$weekday, levels = c("Monday", "Tuesday","Wednesday","Thursday", "Friday","Saturday","Sunday"))
weekdayorder = c("Monday", "Tuesday","Wednesday","Thursday", "Friday","Saturday","Sunday")
grouped_weekday_year = grouped_weekday_year[order(match(grouped_weekday_year$weekday,weekdayorder)),]
library(lattice)
xyplot( PM2.5 ~ weekday ,
type="b",
group=year,
data = grouped_weekday_year,
auto.key = TRUE,
)
xyplot( CO ~ weekday ,
type="b",
group=year,
data = grouped_weekday_year,
auto.key = TRUE,
)
xyplot( TEMP ~ weekday ,
type="b",
group=year,
data = grouped_weekday_year,
auto.key = TRUE,
)
xyplot( NO2 ~ weekday ,
type="b",
group=year,
data = grouped_weekday_year,
auto.key = TRUE,
)
grouped_hour_year = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES, DEWP,RAIN,WSPM) ~ year + hour , data = data, FUN = mean, na.rm = TRUE)
grouped_hour = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES, DEWP,RAIN,WSPM) ~ hour , data = data, FUN = mean, na.rm = TRUE)
grouped_hour_median = aggregate(cbind(PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES, DEWP,RAIN,WSPM) ~ hour , data = data, FUN = median, na.rm = TRUE)
library(lattice)
xyplot( PM2.5 ~ hour ,
type="b",
group=(year),
data = grouped_hour_year,
auto.key = TRUE,
)
xyplot( PM2.5 ~ hour ,
type="b",
data = grouped_hour,
auto.key = TRUE,
)
xyplot( PM2.5 ~ hour ,
type="b",
data = grouped_hour_median,
auto.key = TRUE,
)
xyplot( TEMP ~ hour ,
type="b",
group=(year),
data = grouped_hour_year,
auto.key = TRUE,
)
ts_by_hour <- ts(data, frequency = 24)
ts.plot(ts_by_hour)
time_index <- seq(from = as.POSIXct("2013-03-01 00:00"),
to = as.POSIXct("2017-2-28 23:00"), by = "hour")
xts_by_hour <- xts(data[, c("PM2.5","PM10","SO2","NO2","CO","O3","TEMP","PRES", "DEWP","RAIN","WSPM")], order.by = time_index, frequency = 24)
attr(xts_by_hour, 'frequency')<- 24
plot.xts(xts_by_hour[,"PM2.5"])
boxplot(xts_by_hour [,'PM2.5'] ~ cycle(xts_by_hour[,'PM2.5']))
decpmpose_hours <-decompose(as.ts(xts_by_hour[,'PM2.5']))
plot(decpmpose_hours)